In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import os

In [2]:
Gen2_phenotypes = pd.read_pickle(os.path.expanduser('~/Experiments/lab3000_n1e1p1b2/lab3000_n1e1p1b2+Gen2+phenotypes.p'))
Gen2_phenotypes.sort_values('test_accuracy',ascending=False).head()


Out[2]:
gene_name misclassed test_accuracy test_loss time train_accuracy train_loss
0 lab3000_n1e1p1b2+Gen2+gene26 {'true_class': [4, 5, 23, 20, 1, 40, 15, 1, 4,... 0.814782 0.854156 141.992026 0.927378 0.342601
0 lab3000_n1e1p1b2+Gen2+gene2 {'true_class': [4, 5, 23, 20, 1, 40, 15, 1, 21... 0.808994 0.834695 28.404398 0.955586 0.233952
0 lab3000_n1e1p1b2+Gen2+gene16 {'true_class': [4, 5, 23, 20, 1, 40, 15, 1, 21... 0.808549 0.933977 58.579753 0.960040 0.165074
0 lab3000_n1e1p1b2+Gen2+gene12 {'true_class': [4, 5, 23, 20, 1, 40, 15, 1, 21... 0.806768 0.912981 41.787451 0.967710 0.090800
0 lab3000_n1e1p1b2+Gen2+gene25 {'true_class': [4, 5, 23, 20, 1, 40, 15, 1, 4,... 0.806322 0.878398 128.751011 0.965236 0.121338

Misclassification heatmap with Gen2 "best" accuracy model


In [3]:
#make the misclassified DF from the best accuracy model
misclassified_df = pd.DataFrame.from_dict(Gen2_phenotypes.sort_values('test_accuracy',ascending=False).iloc[0]['misclassed'])

mistakes_dict = {}
#loop through number of classes
for i in range(46):
    mistakes_for_class_i = list(misclassified_df[misclassified_df.true_class==i]['pred_class'])
    col_mistakes = []
    #loop again through number of classes
    for j in range(46):
        if j in mistakes_for_class_i:
            mistake_count = len(misclassified_df[(misclassified_df.true_class==i) & (misclassified_df.pred_class==j)])
            col_mistakes.append(mistake_count)
        else:
            col_mistakes.append(0)
    mistakes_dict[str(i)] = col_mistakes

#column headers refer to true classes
#row indices refer to predicted classes
misclass_heat_df = pd.DataFrame.from_dict(mistakes_dict)
col_order = [str(i) for i in range(46)]
misclass_heat_df = misclass_heat_df.reindex(columns=col_order)

#Make a heatmap of miscalssified classes
sns.set(font_scale=3.0)
#ax = sns.heatmap(misclass_heat_df,cmap=plt.cm.Blues, linewidths=.1)
ax = sns.heatmap(misclass_heat_df,linewidths=.1)
ax.xaxis.tick_top()
ax.xaxis.set_label_position('top')
ax.set_xlabel('True Classes',labelpad=20)
ax.xaxis.label.set_fontsize(28)
ax.set_ylabel('Predicted Classes',labelpad=10)
ax.yaxis.label.set_fontsize(28)
txt = '''Misclassification Counts'''
fig = ax.get_figure()
fig.set_size_inches(20, 15)
fig.text(0.3,.1,txt)

for label in ax.get_xticklabels():
    label.set_fontsize(12)

for label in ax.get_yticklabels():
    label.set_fontsize(12)
    
fig.savefig('Gen2_best_accuracy_misclass_heatmap.png')


Misclassification heatmap with Gen2 "worst" accuracy model


In [4]:
#make the misclassified DF from the best accuracy model
misclassified_df = pd.DataFrame.from_dict(Gen2_phenotypes.sort_values('test_accuracy',ascending=False).iloc[-1]['misclassed'])

mistakes_dict = {}
#loop through number of classes
for i in range(46):
    mistakes_for_class_i = list(misclassified_df[misclassified_df.true_class==i]['pred_class'])
    col_mistakes = []
    #loop again through number of classes
    for j in range(46):
        if j in mistakes_for_class_i:
            mistake_count = len(misclassified_df[(misclassified_df.true_class==i) & (misclassified_df.pred_class==j)])
            col_mistakes.append(mistake_count)
        else:
            col_mistakes.append(0)
    mistakes_dict[str(i)] = col_mistakes

#column headers refer to true classes
#row indices refer to predicted classes
misclass_heat_df = pd.DataFrame.from_dict(mistakes_dict)
col_order = [str(i) for i in range(46)]
misclass_heat_df = misclass_heat_df.reindex(columns=col_order)

#Make a heatmap of miscalssified classes
sns.set(font_scale=3.0)
#ax = sns.heatmap(misclass_heat_df,cmap=plt.cm.Blues, linewidths=.1)
ax = sns.heatmap(misclass_heat_df,linewidths=.1)
ax.xaxis.tick_top()
ax.xaxis.set_label_position('top')
ax.set_xlabel('True Classes',labelpad=20)
ax.xaxis.label.set_fontsize(28)
ax.set_ylabel('Predicted Classes',labelpad=10)
ax.yaxis.label.set_fontsize(28)
txt = '''Misclassification Counts'''
fig = ax.get_figure()
fig.set_size_inches(20, 15)
fig.text(0.3,.1,txt)

for label in ax.get_xticklabels():
    label.set_fontsize(12)

for label in ax.get_yticklabels():
    label.set_fontsize(12)
    
fig.savefig('Gen2_worst_accuracy_misclass_heatmap.png')



In [5]:
misclassified_df.shape


Out[5]:
(1772, 2)